################################################################################################
# Ambulance Project: Main Analytic File
# John Graves
# Created 25 May, 2016
# Latest: 23 April, 2018 (Final ReStat version)
#
################################################################################################

# Note: to install packages, use the following
#install.packages('package_name', dependencies=TRUE, repos='http://cran.us.r-project.org')

####
##
# Clearout memory and load necessary packages


rm(list=ls())

version = "5-0"
cohort = "ndscv220"

options("scipen"=15, "digits"=4)
pkg = list("haven","foreign","ggplot2","plyr","ggthemes","stringr","Hmisc","AER","dplyr","lazyeval","stargazer")
invisible(lapply(pkg, require, character.only = TRUE))
homedir = "~/hospital-quality/analysis"
setwd(homedir); list.files()



####
##
# Functions
jkf.a = jkf.h = function(x) (sum(x,na.rm=TRUE)-x) / (length(na.omit(x))-1)
demean = function(x) x-mean(x)
std = function(x) (x-mean(x,na.rm=TRUE)) / (2*sd(x,na.rm=TRUE))
sd2 = function(x) 2*sd(x,na.rm=TRUE)
orig = dummy = function(x) x

####
##
# Load Main Data File
main.file = paste0("../data/",cohort,"-analysis-small-file-ver-",version,".dta")
main  = read_stata(main.file)
main$amb_orighomescene = with(main,as.integer(amb_orighome==1 | amb_origscene==1))
hosp.id.amb.samp = unique(main$prvnumgrp)
main$distance = main$amb_miles

# Restricting to 2001 Onwards
main = main %>% filter(diag_year>=2008) %>% select(-year1,-year2,-year3,-year4,-year5,-year6,-year7,-year8,-year9)

# Getting a list of Populated ZIP Codes
zz = main %>% select(zip_med) %>% mutate(n = 1) %>% group_by(zip_med)  %>% summarise(count = sum(n)) %>% filter(count>20)
write.dta(zz,file= paste0("../data/",cohort,"-zip-codes-",version,".dta"))

####
##
# Hospital Quality Data
load("../data/aha-data-120617.Rdata")
Final <- Final %>% janitor::clean_names()
write_dta(Final,path="../data/aha-data-120617.dta")
hosp = tbl_df(Final)

# Hospital ID
hosp$prvnumgrp = hosp$provider_id

# Patient Satisfaction
hosp$satis = hosp$abs_hcahps_cmp_yr1
hosp$satisC = hosp$abs_hcahps_cmp_conc

# Process
hosp$process = hosp$abs_proc_cmp_yr1 # Composite process score (1 year lag)
hosp$processC = hosp$abs_proc_cmp_conc # Composite process score (concurrent quality)
# CMS Measure names that go into the composite process score
composite.inputs = c("AMI_2"   ,   "HF_1"  ,     "HF_2"    ,   "HF_3" ,      "PN_6"    ,   "SCIP_INF_1" ,"SCIP_INF_3")
# Condition-specific hosptial measures (look at hte 1-year lag versions)
process.other <- c("AMI_4","HF_4","PN_4","PN_2","PN_7","AMI_1")
# There are character limits on file name length; just creating versions of used quality measures that are shorter in name.
hosp$ami2 = hosp$ami_2_yr1
hosp$hf1 = hosp$hf_1_yr1
hosp$hf2 = hosp$hf_2_yr1
hosp$hf3 = hosp$hf_3_yr1
hosp$pn6 = hosp$pn_6_yr1
hosp$inf1 = hosp$scip_inf_1_yr1
hosp$inf3 = hosp$scip_inf_3_yr1
hosp$ami4 = hosp$ami_4_yr1
hosp$pn4 = hosp$pn_4_yr1
hosp$hf4 = hosp$hf_4_yr1
hosp$pn2 = hosp$pn_2_yr1
hosp$pn7 = hosp$pn_7_yr1
hosp$ami1 = hosp$ami_1_yr1

# Outcome Measures (1-year lag and concurrent)
hosp$Hmort30 = hosp$abs_mort_cmp_yr1
hosp$Hmort30C = hosp$abs_mort_cmp_conc
hosp$Hreadm30 = hosp$abs_readm_cmp_yr1
hosp$Hreadm30C = hosp$abs_readm_cmp_conc
hosp$AMImort30 = hosp$mort_30_ami_yr1
hosp$PNmort30 = hosp$mort_30_pn_yr1
hosp$HFmort30 = hosp$mort_30_hf_yr1
hosp$AMIreadm30 = hosp$readm_30_ami_yr1
hosp$PNreadm30 = hosp$readm_30_pn_yr1
hosp$HFreadm30 = hosp$readm_30_hf_yr1

# Structural Measures
hosp$volume = hosp$hospbd
hosp$teach = hosp$teach
hosp$forpr = hosp$forpr
hosp$nonpr = hosp$nonpr
hosp$gov = hosp$gov
hosp$lowpr = hosp$low_profit
hosp$hipr = hosp$high_profit
hosp$coth = hosp$coth
hosp$diag_year = hosp$year

# Adding Indicators for Whether Hospital is ain Top Decile (Used in earlier drafts)
hospq90 <- hosp %>% group_by(year) %>% summarise(satisQ90 = quantile(satis,.9,na.rm=TRUE),
                                                 processQ90 = quantile(process,.9,na.rm=TRUE),
                                                 Hmort30Q90 = quantile(Hmort30,.1,na.rm=TRUE),
                                                 Hreadm30Q90 = quantile(Hreadm30,.1,na.rm=TRUE))    
hosp <- hosp %>% inner_join(hospq90) %>% mutate(satisQ90 = as.numeric(satis>=satisQ90),
                                                 processQ90 = as.numeric(process>=processQ90),
                                                 Hmort30Q90 = as.numeric(Hmort30<=Hmort30Q90),
                                                 Hreadm30Q90 = as.numeric(Hreadm30 <= Hreadm30Q90))

######
# Composite Quality 
load("../data/composite-quality-112116.Rdata")
composite <- Final %>% tbl_df() %>% rename(provider_id = Provider.ID)
hosp <- hosp %>% left_join(composite,c("provider_id","year"))

####
##
# Main Analytic File

####
##
# Set Variables To Keep
YY = c("death30","death365","readmit30","hh30","survival","ageAtdiag")
XX.Yr = names(main)[grep("^year",names(main))]
XX.D = c("ag70t74", "ag75t79", "ag80t84", "ag85t89", "ag90t94", "ag95p", "male", "black", "race_other","num_comorbid")
XX.A = c("amb_miles","amb_als","amb_emergency","amb_iv","amb_intubate","amb_op","amb_pmt")
XX.C = names(main)[grep("como_",names(main))]
XX.Dx = names(main)[grep("dx",names(main))]
XX = c(XX.Yr,XX.D,XX.A,XX.C,XX.Dx)
II = c("prvnumgrp","amb_id","zip_med","amb_orighomescene","hrrnum","hsanum","diag_year")
#RESTAT revision list:
QQc = c("satis","process","process2","Hmort30","Hreadm30","comp_qual","comp_qual_2009")  
QQc = c(QQc,"satisC","processC","Hmort30C","Hreadm30C") # Concurrent Measures (continuous measures)
QQc = c(QQc,"ami2","hf1","hf2","hf3","pn6","inf1","inf3","ami4","hf4","pn4","pn2","pn7","ami1")
QQd = c("teach","coth","forpr","nonpr","gov","satisQ90","processQ90","Hmort30Q90","Hreadm30Q90") # Dummy measures
QQ = c(QQc,QQd)
glimpse(hosp[,QQ])

hosp12 = hosp %>% filter(year==2012) %>% select_(.dots=c("prvnumgrp",QQc))
names(hosp12)[-1] = paste0(names(hosp12)[-1],"_12")
hosp12 %>% select(prvnumgrp,process_12,process2_12) %>% filter(prvnumgrp=="440039")

QQ3 = QQ
QQ12 = names(hosp12)[-1]

# Merge Medicare and Hospital Data
AA = merge(tbl_df(main[,c(YY,XX,II, "distance")]),hosp[,c("prvnumgrp","diag_year",QQ)],by=c("prvnumgrp","diag_year")); dim(AA)
AA = merge(AA,hosp12,by=c("prvnumgrp"),all.x=TRUE); dim(AA)

QQ = c(QQ,QQ12,"distance")
QQc = c(QQc,QQ12)

# Function to look at proportion missing of each variable.
propmiss <- function(dataframe) {
  m <- sapply(dataframe, function(x) {
    data.frame(
               nmiss=sum(is.na(x)),
               n=length(x),
               propmiss=sum(is.na(x))/length(x)
               )
  })
  d <- data.frame(t(m))
  d <- sapply(d, unlist)
  d <- as.data.frame(d)
  d$variable <- row.names(d)
  row.names(d) <- NULL
  d <- cbind(d[ncol(d)],d[-ncol(d)])
  return(d[order(d$propmiss), ])
}

propmiss(AA)
AA.withmissing = AA
AA = na.omit(AA)
AA.Orig = AA
QQ.main <- QQ

####
##
# Main Sample
##
####


# Full Sample
# Standardize Quality Measures

AA.temp = AA.Orig %>% mutate_each_(funs(orig,dummy),QQ)
AA = AA.temp %>% mutate_each_(funs(std),QQc)  # JG: RESTAT - R1 adding original versions too

# These are the (unstandardized) versions for use in the RESTAT appendix table. 
AA$satis_abs = AA$satis_orig
AA$process_abs = AA$process_orig
AA$Hmort30_abs = AA$Hmort30_orig
AA$Hreadm30_abs = AA$Hreadm30_orig

AA$satis_abs_orig= AA$satis_orig
AA$process_abs_orig = AA$process_orig
AA$Hmort30_abs_orig= AA$Hmort30_orig
AA$Hreadm30_abs_orig = AA$Hreadm30_orig

# Add in quality measures on original scale.
QQ = c(QQ,"satis_abs","process_abs","Hmort30_abs","Hreadm30_abs")

# Create Instruments
AA = AA %>% group_by(amb_id) %>% mutate_each_(funs(jkf.a,dummy),QQ)
Z = names(AA)[grep("jkf.a",names(AA))]

# Demean Fixed Effects
full = c(YY,XX,QQ,Z)

# Full sample
AAdm = AA %>% group_by(zip_med,amb_orighomescene) %>% mutate_each_(funs(demean),full)
save(AAdm,file=paste0("../data/analytic-file-",cohort,"-ver-",version,".Rdata"))
save(AA.Orig,file=paste0("../data/analytic-file-orig",cohort,"-ver-",version,".Rdata"))

# Create a separate sample for the 1-year mortality outcomes
AAdm1y <- AA %>% filter(diag_year!=2012) %>% group_by(zip_med,amb_orighomescene) %>% mutate_each_(funs(demean),full)
save(AAdm1y,file=paste0("../data/analytic-file-",cohort,"-ver-",version,"-1year.Rdata"))

AAdmyr = AA %>% group_by(zip_med,amb_orighomescene,diag_year) %>% mutate_each_(funs(demean),full)
save(AAdmyr,file=paste0("../data/analytic-file-",cohort,"-zyear-ver-",version,".Rdata"))
AAdmyr1y <- AA %>% filter(diag_year!=2012) %>% group_by(zip_med,amb_orighomescene,diag_year) %>% mutate_each_(funs(demean),full)
save(AAdmyr1y,file=paste0("../data/analytic-file-",cohort,"-zyear-ver-",version,"-1year.Rdata"))

# Get the Providers in the Analytic File
AAhosps <- AA %>% ungroup() %>% select(prvnumgrp) %>% unique()
save(AAhosps,file=paste0("../data/hospital-sample-",cohort,"-ver-",version,".Rdata"))

#AAcomp <- AA %>% ungroup() %>% select(prvnumgrp,compm365,compm30,compr30,diag_year) %>% unique()
#save(AAcomp,file=paste0("../data/hospital-composite-measures-",cohort,"-ver-",version,".Rdata"))

# Residualized and Remeaned Version of Data
AAm = AA %>% ungroup() %>% summarise_each_(funs(mean),full)
AAr <- AAdm
quantile(AAdm$Hmort30_jkf.a,probs = c(.05,.9),na.rm=TRUE)

AAdms <- AAdm[sample(1:nrow(AAdm),1000),]
AAs <- AA[sample(1:nrow(AA),1000),]


### RESTAT SENSITIVITY CHECK: Non-CMS Sample (i.e., not AMI, PN or HF)
# the underlying sample here is the 28 conditions, which includes AMI and PN.
# so if we restrict to AMI != 1 and PN != 1 we end up with a sample
# without the 3 CMS conditions.

# Standardize Quality Measures
AA.tmp = AA.Orig %>% filter(idx_ami!=1 & idx_pn!=1) %>% mutate_each_(funs(orig,dummy),QQ.main)
AA = AA.tmp %>% filter(idx_ami!=1 & idx_pn!=1) %>% mutate_each_(funs(std),QQc)  # JG: RESTAT - R1 adding original versions too
AA.Orig  = AA.Orig %>% filter(idx_ami!=1 & idx_pn!=1) 

AA$satis_abs = AA$satis_orig
AA$process_abs = AA$process_orig
AA$Hmort30_abs = AA$Hmort30_orig
AA$Hreadm30_abs = AA$Hreadm30_orig

AA$satis_abs_orig= AA$satis_orig
AA$process_abs_orig = AA$process_orig
AA$Hmort30_abs_orig= AA$Hmort30_orig
AA$Hreadm30_abs_orig = AA$Hreadm30_orig

# Add in quality measures on original scale. 
QQ = c(QQ.main,"satis_abs","process_abs","Hmort30_abs","Hreadm30_abs")

# Create Instruments
AA = AA %>% group_by(amb_id) %>% mutate_each_(funs(jkf.a,dummy),QQ)

Z = names(AA)[grep("jkf.a",names(AA))]

# Demean Fixed Effects
full = c(YY,XX,QQ,Z)

# Full sample
save(AA.Orig,file=paste0("../data/analytic-file-orig",cohort,"no-ami-pn-hf-ver-",version,".Rdata"))
AAdm = AA %>% group_by(zip_med,amb_orighomescene) %>% mutate_each_(funs(demean),full)
save(AAdm,file=paste0("../data/analytic-file-",cohort,"no-ami-pn-hf-ver-",version,".Rdata"))

# Create a separate sample for the 1-year mortality outcomes
AAdm1y <- AA %>% filter(diag_year!=2012) %>% group_by(zip_med,amb_orighomescene) %>% mutate_each_(funs(demean),full)
save(AAdm1y,file=paste0("../data/analytic-file-",cohort,"no-ami-pn-hf-ver-",version,"-1year.Rdata"))

# Create Condition-Specific (AMI, PN, HF) sub-samples

if (cohort == "cms20")
  {
    ####
    ##
    # AMI Cohort
    
    # Merge Medicare and Hospital Data
    AA = na.omit(merge(tbl_df(main[,c(YY,XX,II,"distance")]),hosp[,c("prvnumgrp","diag_year",QQ3)],c("prvnumgrp","diag_year"))); dim(AA)
    AA = merge(AA,hosp12,by=c("prvnumgrp")); dim(AA)
    
    AA = filter(AA,idx_ami==1)
    # Standardize Quality Measures
    AA = AA %>% mutate_each_(funs(orig,dummy),QQ)
    AA = AA %>% mutate_each_(funs(std),QQc)
    # Create Instruments
    AA = AA %>% group_by(amb_id) %>% mutate_each_(funs(jkf.a,dummy),QQ)
    Z = names(AA)[grep("jkf.a",names(AA))]
    # Demean Fixed Effects
    full = c(YY,XX,QQ,Z)
    AA = AA %>% group_by(zip_med,amb_orighomescene) %>% mutate_each_(funs(demean),full)
    save(AA,file=paste0("../data/analytic-file-",cohort,"ami--ver-",version,".Rdata"))

    ####
    ##
    # Pneumonia Cohort

    # Merge Medicare and Hospital Data
    AA = na.omit(merge(tbl_df(main[,c(YY,XX,II,"distance")]),hosp[,c("prvnumgrp","diag_year",QQ3)],c("prvnumgrp","diag_year"))); dim(AA)
    AA = merge(AA,hosp12,by=c("prvnumgrp")); dim(AA)    
    AA = filter(AA,idx_pn==1)
    # Standardize Quality Measures
    AA = AA %>% mutate_each_(funs(orig,dummy),QQ)
    AA = AA %>% mutate_each_(funs(std),QQc)
    # Create Instruments
    AA = AA %>% group_by(amb_id) %>% mutate_each_(funs(jkf.a,dummy),QQ)
    Z = names(AA)[grep("jkf.a",names(AA))]
    # Demean Fixed Effects
    full = c(YY,XX,QQ,Z)
    AA = AA %>% group_by(zip_med,amb_orighomescene) %>% mutate_each_(funs(demean),full)
    save(AA,file=paste0("../data/analytic-file-",cohort,"pn--ver-",version,".Rdata"))

    ####
    ##
    # Heart Failure Cohort

    # Merge Medicare and Hospital Data
    AA = na.omit(merge(tbl_df(main[,c(YY,XX,II,"distance")]),hosp[,c("prvnumgrp","diag_year",QQ3)],c("prvnumgrp","diag_year"))); dim(AA)
    AA = merge(AA,hosp12,by=c("prvnumgrp")); dim(AA)    
    AA = filter(AA,idx_ami==0 & idx_pn==0)
    # Standardize Quality Measures
    AA = AA %>% mutate_each_(funs(orig,dummy),QQ)
    AA = AA %>% mutate_each_(funs(std),QQc)
    # Create Instruments
    AA = AA %>% group_by(amb_id) %>% mutate_each_(funs(jkf.a,dummy),QQ)
    Z = names(AA)[grep("jkf.a",names(AA))]
    # Demean Fixed Effects
    full = c(YY,XX,QQ,Z)
    AA = AA %>% group_by(zip_med,amb_orighomescene) %>% mutate_each_(funs(demean),full)
    save(AA,file=paste0("../data/analytic-file-",cohort,"hf--ver-",version,".Rdata"))
}





